import warnings
warnings.filterwarnings('ignore')
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
df = pd.read_csv(r'C:\Users\..\KNNData.csv')
df
df.describe()
sns.pairplot(df)
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
scaled_features = ss.fit_transform(df.drop('TARGET CLASS', axis = 1))
np.round(pd.DataFrame(scaled_features).describe())
from sklearn.preprocessing import MinMaxScaler
mm = MinMaxScaler(feature_range=(0, 1)) # feature_range=(0, 1)
scaled_features_mm = mm.fit_transform(df.drop('TARGET CLASS', axis = 1))
np.round(pd.DataFrame(scaled_features_mm).describe())
pd.DataFrame(scaled_features_mm)
from sklearn.preprocessing import RobustScaler
rr = RobustScaler()
scaled_features_rr = rr.fit_transform(df.drop('TARGET CLASS', axis = 1))
np.round(pd.DataFrame(scaled_features_rr).describe())
pd.DataFrame(scaled_features_mm)
sns.pairplot(pd.DataFrame(scaled_features_mm))
pd.DataFrame(scaled_features)
df.columns[:-1]
df_final = pd.DataFrame(scaled_features, columns=df.columns[:-1])
df_final
df_final = pd.concat([df_final, df['TARGET CLASS']], axis = 1)
df_final
from sklearn.model_selection import train_test_split
X = df_final.drop(['TARGET CLASS'], axis = 1)
Y = df_final['TARGET CLASS']
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3, random_state = 101)
from sklearn.neighbors import KNeighborsClassifier
knn1 = KNeighborsClassifier(n_neighbors=1)
knn1.fit(X_train, y_train)
pred_knn1 = knn1.predict(X_test)
from sklearn import metrics
metrics.accuracy_score(y_test, pred_knn1)
from sklearn.model_selection import GridSearchCV
param_grid = {'n_neighbors' : range(3,17, 2), 'p' : [1,2]}
list( range(3,17, 2))
knn = KNeighborsClassifier()
knn_gscv = GridSearchCV(knn, param_grid, cv=5,verbose=1)
import time
start_time = time.time()
knn_gscv.fit(X_train, y_train)
print(time.time() - start_time, 'Seconds')
knn_gscv.best_params_
knn_gscv.best_score_
pd.DataFrame(knn_gscv.cv_results_)
# Plot a graph to observe error rate
error_rate = []
for i in range(3,17, 2):
knn = KNeighborsClassifier(n_neighbors=i)
knn.fit(X_train, y_train)
pred = knn.predict(X_test)
error_rate.append(np.mean(pred != y_test))
error_rate
plt.figure(dpi = 125)
sns.lineplot(x =range(3,17, 2), y = error_rate,marker = 'o' , color = 'red');